/*******************************************************************************
*Project :          COVID-19 Research Project 

*Purpose : 		    Summary : Second Measure

*Source : 			Womply Data from AWS 


*******************************************************************************/

clear all 
set more off 
capture log close 

* generate global date
global date=subinstr(c(current_date), " ", "_", .)
set obs 1 
gen date="${date}"
split date, parse("_")
cap replace date=date3+"_"+date2+"_"+date4 if date1==""
cap replace date=date2+"_"+date1+"_"+date3 if date1!=""
global date=date[1]
display as error "Today's date = ${date}"
 
 
* copy summary table to summary directory
cap noisily erase "tables/summary/summary_second_measure_total.xlsx"
cap noisily erase "tables/summary/summary_second_measure_consumer.xlsx"


copy "tables/templates/summary_template_second_measure_consumer.xlsx" "tables/summary/summary_second_measure_consumer.xlsx"
copy "tables/templates/summary_template_second_measure_total.xlsx" "tables/summary/summary_second_measure_total.xlsx"


*===============================================================================
* Get state level population estimates 
*===============================================================================

* load data 
  u "data/clean/usda_ers/county_information.dta" , clear 
  
* get state fips 
   tostring county_fips, replace 
   replace county_fips = "0" + county_fips if length(county_fips)==4 
   g state_fips = substr(county_fips,1,2) 
   destring state_fips , replace 
   format state_fips %02.0f
   
* aggregate population to state level 
   bys state_fips : egen pop_2018 = total(pop_estimate_2018)
   bys state_fips : keep if _n == 1 
   
* keep only certain variables 
   keep state_fips pop_2018 
   
* save 
  save "data/clean/usda_ers/state_pop.dta" , replace 
 
   

 
*===============================================================================
* Get summary - total 
*===============================================================================

* load second measure daily sales data 
  use "data/clean/second_measure/sector_geo_daily_sales.dta" , clear 
* drop overall 
  drop if inlist(sector, "overall" , "retail" , "non-retail", "restaurants")     
  
* get population values  
  merge m:1 state_fips using  "data/clean/usda_ers/state_pop.dta" , assert(2 3) keep(3) nogen 
  
  encode(sector) , gen(sec) 
  lab list sec 
  drop sector naics2  customers_sector 
  
* reshape data 
  rename sales sales_
  reshape wide sales_  , i(date state_fips) j(sec) 
  
  xtset date state_fips 
  tsfill , full 
  xtset date state_fips

  
* replace missing values , generate per capita
  foreach var of varlist sales_* { 
	replace `var' = 0 if missing(`var')
	}
	
* get overall value 
  egen sales_22 = rowtotal(sales_*)
  replace sales_22 = sales_22 - (sales_2 + sales_8 + sales_9 + sales_15) 
  
* reshape again 
  reshape long  sales_ , i(date state_fips) j(sec)
  
* collapse data 
  ren sales_ sales 
  
* subset data to March 2020 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
 
* collapse data 
  collapse (mean) avg_sales = sales ///
           (median) med_sales = sales ///
		   (p10)  p10_sales = sales ///
		   (p90)  p90_sales = sales ///
		   (sd)  sd_sales  = sales , /// 
		    by(sec) 
			
  decode(sec) , gen(sector)
  replace sector = "overall" if sec == 22 
   
* format category 
 replace sector = proper(sector)
 replace sector  = subinstr(sector, "And" , "and" ,.) 
 sort sector, stable 
  
* sort and export 
  g sorting = "w"        if sector == "Grocery Stores" 
  replace sorting = "x"  if sector == "Pharmacies" 
  replace sorting = "y"  if sector == "Amazon" 
  replace sorting = "z"  if sector == "Overall" 
  
  sort sorting  , stable 
  drop sorting 

* keep certain variables 
  drop sec
  order sector

* export file 
export excel using "tables/summary/summary_second_measure_total.xlsx" , cell(A4) sheetmodify ///
	   keepcellfmt 
 
 
 *===============================================================================
* Get summary - per consumer
*===============================================================================

* load second measure daily sales data 
  use "data/clean/second_measure/sector_geo_daily_sales.dta" , clear 
  
* drop overall 
  drop if inlist(sector, "overall" , "retail" , "non-retail", "restaurants")     
	
* get population values 
  merge m:1 state_fips using "data/clean/usda_ers/state_pop.dta"  , assert(2 3) keep(3) nogen 
  
  encode(sector) , gen(sec) 
  lab list sec 
  drop sector naics2  
  
* reshape data 
  rename sales_sector sales_
  rename customers_sector customers_
  reshape wide sales_ customers_ , i(date state_fips) j(sec) 
  
  xtset  state_fips date
  tsfill , full 
  xtset state_fips date 
  
* replace missing values , generate per consumer
  foreach var of varlist sales_* customers_* { 
	replace `var' = 0 if missing(`var')
	}
	
* get total no. of customers in a state on March 1st 
  egen total_customers = rowtotal(customers_*) 
  replace total_customers = total_customers - (customers_2 + customers_8 + customers_9 + customers_15) 
  drop customers_* 
  g customers = total_customers if inrange(date,td(01mar2020) , td(07mar2020)) 
  bys state : egen avg_customers  = mean(customers) 
	
* save a file with average customers to pull into event-studies 
  preserve 
   keep state_fips avg_customers 
   bys state_fips : keep if _n == 1 
   save "data/clean/second_measure/avg_customers.dta" , replace
  restore 
  
  
  forv i = 1/21 { 
	gen pc_sales_`i' = (sales_`i'/avg_customers)
	} 
	
* get overall value 
  egen sales_22 = rowtotal(sales_*) 
  replace sales_22 = sales_22 - (sales_2 + sales_8 + sales_9 + sales_15)
  gen pc_sales_22 = sales_22/avg_customers 

  
* reshape again 
  reshape long pc_sales_ sales_  , i(date state_fips) j(sec)
  
* collapse data 
  ren pc_sales_ pc_sales
  
  
* subset data to March 2020 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
 
* collapse data 
  collapse (mean) avg_sales = pc_sales ///
           (median) med_sales = pc_sales ///
		   (p10)  p10_sales = pc_sales ///
		   (p90)  p90_sales = pc_sales ///
		   (sd)  sd_sales  = pc_sales [weight = avg_customers]  , /// 
		   by(sec) 
			
  decode(sec) , gen(sector)
  replace sector = "overall" if sec == 22 
   
* format category 
 replace sector = proper(sector)
 replace sector  = subinstr(sector, "And" , "and" ,.) 
 sort sector , stable 
  
* sort and export 
  g sorting = "w"        if sector == "Grocery Stores" 
  replace sorting = "x"  if sector == "Pharmacies" 
  replace sorting = "y"  if sector == "Amazon" 
  replace sorting = "z"  if sector == "Overall" 
  
  sort sorting  , stable 
  drop sorting 

* keep certain variables 
  drop sec
  order sector

* export file 
export excel using "tables/summary/summary_second_measure_consumer.xlsx" , cell(A4) sheetmodify ///
	   keepcellfmt 
